********************************************************************************
** Monte Carlo
** The do file has the following arguments:
** 1) lRandomizationSortingType : whether and how to sort into groups for treatment 
** 2) lRandomizationTreatmentType : how to randomize treatment
** 3) lRegressionType : regression type, that is, whether include Dutch
****************************************************************

********************************************************************************
args lRandomizationSortingType lRandomizationTreatmentType lIncludeDutch

** Reset the seed to Stata original, so that randomness does not affect the results when comparing different experimental designs
set seed 10101

** Set sample size to the size of the data: 60 cities
local lSampleSizeForCities = 60

****************************************************************
** Start the loop
****************************************************************
local i = 1
while `i'<=10000 {
display `i'
****************************************************************
** 1) Read in the real data

use $temp/temp_data_for_montecarlo.dta, clear
keep vPage vLanguage vNumLanguage vNumPage vNumberOfTourists vLength2010 vLength2014 vAverUsers2014 vAverUsers2010

****************************************************************
** 1a) Drop or keep Dutch
if (`lIncludeDutch' == 0) {
	drop if vLanguage == "nl"
}

****************************************************************
** 2) Take a random sample with replacement and create a new location id vBootstrapLocationID
** The new id runs from 1 to the number of the number of the size of the sample. Use the new id in all the calculations.
bsample `lSampleSizeForCities', cluster(vPage) idcluster(vBootstrapLocationID)
*tabulate vLanguage

****************************************************************
** 3) Randomization of treatment
** Different randomization depending on the do file argument

** For some of the treatment randomization versions, need to generate treatment groups
** gen uniform random number (note that need a random number later in the other case, so do not delete it, otherwise if have to generate it again, the seed will be different in some of the cases), the number is from 0 to 1

generate double vUniformRandomNumber = runiform()
egen vTagLocation = tag(vBootstrapLocationID)
gen tempUniformRandomNumber  = vUniformRandomNumber
replace tempUniformRandomNumber = -99 if vTagLocation!=1
egen double vRandomNumberByLocation = max(tempUniformRandomNumber), by(vBootstrapLocationID)
drop temp*

****************************************************************
** Versions by sorting or not sorting and by sorting of different variables
****************************************************************

if (`lRandomizationSortingType' == 1) {
  ** Version 1: Sort by the total number of tourists from the countries
  ** Here set sort seed, because do not want that to be random, it has to be sorted the same way every time
  set sortseed 1001
  sort vNumberOfTourists vBootstrapLocationID

  * ** The higher the vSortedLocationID, the more tourists there were, the higher the ID (note that the reason why it is important to use for the grouping vBootstrapLocationID, is for the case where cities have equal number of tourists, which is very likely in the bootstrap sample)
  ** Note that "group" creates one variable taking on values 1, 2, ... for the groups formed by varlist. The order of the groups is that of the sort order of varlist.
  egen vSortedByTouristsID = group(vNumberOfTourists vBootstrapLocationID)

  ** Generate 10 groups by vSortedByTouristsID
  gen vLocationGroup = 1
  forvalues lGroupNumber = 1/10 {
		replace vLocationGroup = vLocationGroup+1 if (vSortedByTouristsID>`lGroupNumber'*6)
  }
*  tabulate vLocationGroup 
}
else if (`lRandomizationSortingType' == 2) {
  ** Generate 10 groups
  gen vLocationGroup = 1
  forvalues lGroupNumber = 1/10 {
    replace vLocationGroup = vLocationGroup+1 if (vBootstrapLocationID>`lGroupNumber'*6)
  }
}
else if (`lRandomizationSortingType' == 3) {
	display "No sorting to groups"
}

****************************************************************
** Assigning treatment
****************************************************************
if (`lRandomizationTreatmentType' == 1) {
  ** Version 1: Each city is treated in 2 languages
	** Here set sort seed, because do not want that to be random, it has to be sorted the same way
	set sortseed 1001
	sort vLocationGroup vRandomNumberByLocation vBootstrapLocationID
	if (`lIncludeDutch' == 0) {
		by vLocationGroup: gen vTreatmentGroup = 1 + (_n > 3) +(_n>2*3) + (_n>3*3) + (_n>4*3) + (_n>5*3)
	}
	else if (`lIncludeDutch' == 1) {
		by vLocationGroup: gen vTreatmentGroup = 1 + (_n > 4) +(_n>2*4) + (_n>3*4) + (_n>4*4) + (_n>5*4)	
	}
	tabulate vTreatmentGroup
  gen vTreated = 0
  replace vTreated =1 if vTreatmentGroup==1 & (vLanguage == "de" | vLanguage == "nl")
  replace vTreated =1 if vTreatmentGroup==2 & (vLanguage == "fr" | vLanguage == "it")
  replace vTreated =1 if vTreatmentGroup==3 & (vLanguage == "de" | vLanguage == "fr")
  replace vTreated =1 if vTreatmentGroup==4 & (vLanguage == "it" | vLanguage == "nl")
  replace vTreated =1 if vTreatmentGroup==5 & (vLanguage == "de" | vLanguage == "it")
  replace vTreated =1 if vTreatmentGroup==6 & (vLanguage == "fr" | vLanguage == "nl")
  sum vTreated
  tabulate vLanguage vTreated
}
else if (`lRandomizationTreatmentType' == 2) {
  ** Version 2: Completely random	
  egen tempRank = rank(vUniformRandomNumber), unique
  sum tempRank
  gen vTreated = (tempRank<`r(mean)')
  tabulate vLanguage vTreated
}
else if (`lRandomizationTreatmentType' == 3) {
  ** Version 3: Half the cities to treatment	
  gen vTreated = (vBootstrapLocationID<=30)
  tabulate vLanguage vTreated
}

****************************************************************
** Loop over different treatment values
****************************************************************
forvalues lTreatEffectLength = 105(1)120 {

local lTreatEffectLength2 = round(`lTreatEffectLength'/100, 0.01) 
display "`lTreatEffectUser2'"

** Create the treatment (assume treatment is x percent increase) and control groups
** Generate new number of tourist for 2013 taking into account the treatment
gen vExperimentLength = (1 - vTreated)*vLength2014 + vTreated * vLength2014 *`lTreatEffectLength2' 
tabstat vExperimentLength, by(vTreated)
gen vDifLogLength_20142010 = log(vExperiment) - log(vLength2010)
tabstat vDifLogLength_20142010, by(vTreated) 

** Estimate
** Language FE
regress vDifLogLength_20142010 vTreated i.vNumLanguage

local ltstat = _b[vTreated]/_se[vTreated]
local lpvalue = 2*ttail(e(df_r),abs(`ltstat'))
local lReject5 = (`lpvalue'<0.05)
local lReject10 = (`lpvalue'<0.1)
matrix mA`lTreatEffectLength' = `lTreatEffectLength', `lReject10', `lReject5'

drop vExperimentLength vDifLogLength_20142010  

}

****************************************************************
forvalues lTreatEffectUser = 5(1)20 {

local lTreatEffectUser2 = round(`lTreatEffectUser'/100, 0.01) 
display "`lTreatEffectUser2'"

gen vExperimentUsers = vAverUsers2014 + vTreated * (`lTreatEffectUser2')
tabstat vExperimentUsers, by(vTreated) stat(mean p25 p50 p75)

regress vExperimentUsers vTreated i.vNumLanguage vAverUsers2010

local ltstat = _b[vTreated]/_se[vTreated]
local lpvalue = 2*ttail(e(df_r),abs(`ltstat'))
local lReject5 = (`lpvalue'<0.05)
local lReject10 = (`lpvalue'<0.1)
matrix mB`lTreatEffectUser' = `lTreatEffectUser2', `lReject10', `lReject5'

drop vExperimentUsers  

}

****************************************************************
matrix mOutputA = mA105 \ mA106 \mA107 \mA108 \mA109 \mA110 \mA111 \mA112 \mA113 \mA114 \mA115 \ mA116 \ mA117 \ mA118 \ mA119 \ mA120  
matrix list mOutputA
estadd matrix mOutputA

matrix mOutputB = mB5 \ mB6 \ mB7 \ mB8 \ mB9 \ mB10 \ mB11 \ mB12 \ mB13 \ mB14 \ mB15 \ mB16 \ mB17 \ mB18 \ mB19 \ mB20 
matrix list mOutputB
estadd matrix mOutputB

display "The iteration number"
display `i'

if (`i'==1){
esttab e(mOutputA, fmt(%9.0f)) using $temp/temp_length_`lRandomizationSortingType'_`lRandomizationTreatmentType'_`lIncludeDutch'.csv, noobs nonum nomtitle compress fragment plain label varwidth(40) collabels(none) eqlabels(none) mlabels(none) replace

esttab e(mOutputB, fmt(%9.2f %9.0f %9.0f)) using $temp/temp_users_`lRandomizationSortingType'_`lRandomizationTreatmentType'_`lIncludeDutch'.csv, noobs nonum nomtitle compress fragment plain label varwidth(40) collabels(none) eqlabels(none) mlabels(none) replace
}
else {
esttab e(mOutputA, fmt(%9.0f)) using $temp/temp_length_`lRandomizationSortingType'_`lRandomizationTreatmentType'_`lIncludeDutch'.csv, noobs nonum nomtitle compress fragment plain label varwidth(40) collabels(none) eqlabels(none) mlabels(none) append

esttab e(mOutputB, fmt(%9.2f %9.0f %9.0f)) using $temp/temp_users_`lRandomizationSortingType'_`lRandomizationTreatmentType'_`lIncludeDutch'.csv, noobs nonum nomtitle compress fragment plain label varwidth(40) collabels(none) eqlabels(none) mlabels(none) append

}

clear

****************************************************************
** 8) Loop over steps 1 - 7 some X times
** End the loop here
local i = `i' + 1
}

****************************************************************
